from Utils.Function import *
from Utils.RedisHelper import RedisHelper
import time

class IP89Spider(object):
    config = {
            'name':'89IP',
            'urls': ['http://www.89ip.cn/index_%s.html' % i for i in range(1, 20)],
            'keyword': '</html>',
            'interval': 60,
            'protocol': '',
            'ip': '//table[@class="layui-table"]/tbody/tr/td[1]/text()',
            'port': '//table[@class="layui-table"]/tbody/tr/td[2]/text()',
            'type': 'xpath',
            'enable': 1
        }

    def getlist(self):
        config = self.config
        print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) + ':抓取 '+ config['name']+' 开始')
        type = config['type']
        urls = config['urls']

        n = 0
        for url in urls:
            htmlcontent = GetHtml(url)
            if htmlcontent == '':
                continue
            dic = GetDataByXpath(htmlcontent, config)
            redis = RedisHelper()
            for item in dic:
                ip = item['ip']
                data = {}
                data['ip'] = ip.strip()
                data['protocol'] = str(item['protocol']).lower()
                data['port'] = item['port'].strip()
                redis.temp_set(ip, data)
                n = n + 1
            print('正在抓取 ' + config['name'] + ',当前抓取到 ' + str(n) + " 条记录")
            time.sleep(2)
        print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) + ':抓取 ' + config['name'] + ' 结束,共抓取到 ' + str(n) + " 条记录")


if __name__ == '__main__':
    spider = IP89Spider()
    spider.getlist()

